1/ Introduction

This file is a supplementary data attached with the publication concerning the genetic determinism of Durum Wheat to the Fusarium head blight. It aims to describe the genetic map provided in this paper. The script allowing to build the genetic map is in the folder 1_Partie_Bioinfo.

Let’s upload this genetic map:

#Watch out, to reproduct analysis, you have to update the path.
map=read.table("/Users/yan/Dropbox/Publi_Fusariose/ANALYSIS_REPRO/DATA/map_avec_posi_physique.txt" , header=T )

# number of markers
nmark=nrow(map)

This genetic map is composed of 14316 markers. Putative physical positions of markers are available.

Charge some libraries that will be useful

library(RColorBrewer)
library(xtable)
library(tidyverse)
library(rmdformats)
library(plotly)
library(knitr)

2/ Map description

Summary table

Basic statistics are computed for every chromosomes of the genetic map, then for the A and B genomes, and finally for the whole genetic map. Results are presented in the table below:

#Let's create a function that calculate some basic statistics for a piece of map
my_fun=function(my_map){
    num=nrow(bilan)
    num=num+1
    bilan[num,1]=i
    bilan[num,2]=nrow(my_map)
    bilan[num,3]=max(my_map[,3])
    gaps= sort(my_map[,3])[-1] - sort(my_map[,3])[-length(my_map[,3])]
    bilan[num,4]=mean(gaps)
    bilan[num,5]=max(gaps)
    bilan[num,6]=round(nrow(unique(my_map[,c(1,3)])),0)
    return(bilan)
    }

#Let's apply this function on our map, chromosome by chromosome and for the whole map:

# summary table that we are going to fill
bilan=data.frame(matrix(0,0,6)) ; num=0
colnames(bilan)=c("Chromo","nbr_marker","size_in_cM","average_gap","biggest_gap","nb_uniq_pos")

# apply the function to every chromosome
for(i in levels(map$group)){
    map_K=map[map$group==i,]
    bilan=my_fun(map_K)
    }
# then to A and B genomes
for(i in c("A" , "B")){
    map_K=map[substr(map$group , 2 , 2)==i , ]
    bilan=my_fun(map_K)
    }
# then to the whole map
i="tot"
bilan=my_fun(map)
#print(xtable(bilan), type = "html", include.rownames = F , comment=FALSE)
kable(xtable(bilan))
Chromo nbr_marker size_in_cM average_gap biggest_gap nb_uniq_pos
1A 721 175.4 0.2436111 15.2 122
1B 1227 192.9 0.1573409 9.2 154
2A 1195 222.3 0.1861809 12.4 165
2B 1423 243.8 0.1714487 12.2 190
3A 733 212.2 0.2898907 7.0 137
3B 1221 241.0 0.1975410 14.5 187
4A 969 224.3 0.2317149 13.0 151
4B 1033 162.3 0.1572674 16.7 124
5A 787 303.5 0.3861323 10.2 190
5B 1180 259.2 0.2198473 9.3 182
6A 803 190.6 0.2376559 6.5 136
6B 1101 185.5 0.1686364 12.6 153
7A 1187 239.4 0.2018550 9.5 177
7B 736 214.5 0.2918367 8.5 130
A 6395 303.5 0.0474664 9.2 1078
B 7921 259.2 0.0327273 9.3 1120
tot 14316 303.5 0.0212015 9.2 2198

This table is saved as a supplementary material for the publication.

write.table(bilan, "../../../SUPPORTING_DATA/OR_map_feature.csv", sep=";", quote=F, row.names = F)

Markers distribution (plot)

ggplot(map, aes(y=position, x=group, color=ref)) + 
  geom_point() +
  scale_y_reverse() 

3/ Genetic vs Physical positions

For most of the SNPs, a physical position is available trough the Ensembl database. It is interesting to compare the physical and the genetic positions of markers when both informations are available.

Marey Map

This is the “marey map” representation. We will produce a figure for the paper with this code.

p=map %>% 
  filter(group==group_phy) %>% 
  ggplot(aes(x=position_phy, y=position, color=ref, text=marker)) + 
    geom_point(size=0.5, alpha=0.5) + 
    facet_wrap(~group, scales="free") + 
    theme(legend.position="none", axis.text=element_blank() , axis.ticks= element_blank() 
    )
ggplotly(p)

Save this figure as a supplementary material

png("../../../SUPPORTING_DATA/OR_marey_map.png")
p
dev.off()
## quartz_off_screen 
##                 2

Summary statistics

Let’s calculate the spearman correlation between physical and genetic positions for each chromosome?

tb=map %>%
  filter(group==group_phy & !is.na(group)) %>% 
    group_by(group) %>%
    summarize( 
      cor_spearman=cor(position, position_phy, use="complete.obs", method="spearman") %>% round(2),
      nvalue=length(position)
      )
kable(xtable(tb))
group cor_spearman nvalue
1A 1.00 644
1B 1.00 1099
2A 0.99 1074
2B 1.00 1312
3A 1.00 685
3B 1.00 1103
4A 0.99 883
4B 1.00 963
5A 0.99 705
5B 1.00 1083
6A 1.00 739
6B 1.00 969
7A 1.00 1076
7B 1.00 691

Recombination events

Do we have markers that have different chromosome attributions? Apparently we have some:
- between 7A and 4A

adjacency=table(map$group, map$group_phy)
kable(xtable(adjacency))
1A 1B 2A 2B 3A 3B 4A 4B 5A 5B 6A 6B 7A 7B
1A 644 30 0 1 1 0 1 0 0 0 0 0 0 3
1B 57 1099 0 1 0 0 0 0 0 0 0 0 0 2
2A 0 0 1074 71 1 0 0 0 0 2 0 0 0 0
2B 0 0 59 1312 0 3 0 0 1 0 0 0 0 0
3A 1 0 0 0 685 28 0 6 0 0 0 0 0 0
3B 0 1 0 0 50 1103 0 0 0 0 0 0 0 2
4A 0 0 0 0 1 0 883 36 0 0 1 0 8 1
4B 0 0 0 0 0 0 38 963 8 0 0 0 1 0
5A 0 0 0 0 1 0 1 6 705 49 0 0 0 0
5B 0 1 0 0 0 0 7 0 58 1083 0 8 0 0
6A 0 0 0 0 0 0 0 0 0 0 739 35 2 0
6B 0 2 0 0 0 1 1 1 0 1 49 969 0 0
7A 0 1 0 0 0 0 32 1 0 0 0 2 1076 34
7B 0 0 0 0 0 0 0 0 0 1 0 1 28 691
diag(adjacency)=0
p=adjacency %>% 
  as.data.frame() %>% 
  rename(GeneticPosition=Var1, PhysicalPosition=Var2, NumberOfMarkers=Freq) %>% 
  ggplot( aes(x=GeneticPosition, y=PhysicalPosition, z=NumberOfMarkers, fill=NumberOfMarkers)) +
    geom_tile() + 
    theme_bw()
ggplotly(p)